## Mortality rates by age group and year


# Prelims -----------------------------------------------------------------

rm(list=ls())
gc()


## ABS death data from ABS.Stat
abs_deaths_path <- "./Input data/Intermediate input data/ABS DEATHS_AGESPECIFIC_OCCURENCEYEAR_04082021102210575.csv"



# Read ABS death data ----------------------------------------------------

abs_deaths_csv <- fread(abs_deaths_path)



# Create mortality rate data for model -----------------------------------------

## extract relevant data - age specific death rates per 1000 in 2019
abs_deaths <- abs_deaths_csv %>% 
  filter(Time== 2019 & Age!="All ages") %>% 
  select(age=Age, death_rate=Value) %>% 
  ## age format as numeric, and death rate per capita
  mutate(age = age %>% str_extract("[[:digit:]]+") %>% as.numeric,
         death_rate = death_rate/1000,
         ## age grp
         age_grp = cut(age, breaks=seq(0, 105, 5), right=F, include.lowest=T, ordered_result=T)) %>% 
  ## calculate rolling average death rate across 5 years- death numbers smoother in model over time to prevent spikes in deaths when reach new age group.
  arrange(age) %>% 
  mutate(rolling_mortality = ma(death_rate, 5) %>% lead(2)) %>% 
  ## fill in the last few values
  mutate(rolling_mortality = case_when(
    age==97 ~  sum(death_rate[98:101])/4,
    age==98 ~  sum(death_rate[99:101])/3,
    age==99 ~  sum(death_rate[100:101])/2,
    age==100 ~ death_rate,
    TRUE ~ rolling_mortality
  )) %>% 
  ## create "year" variable. ie for first year the cohort is at that age group, use year 1 mortality rate. For second year, use year 2 mortality rate, etc
  group_by(age_grp) %>% 
  mutate(mortality_year = row_number()) %>% 
  ungroup %>% 
  ## keep necessary variables
  select(age, age_grp, mortality_year, mortality_rate=rolling_mortality)
  

# death_rates_plot <- ggplot(abs_deaths) +
#   geom_col(aes(x=age, y=mortality_rate))


## save
qsave(abs_deaths, "./Input data/mortality_rates_ay.qs")